library(ggplot2)
library(here)

############################
# WEATHER PLACEBO
############################

setwd(here("release_data","SOTU"))
dates <- read.csv("sotu_dates_new.csv")

######################
######################
# BASIC PLOTS
######################
######################

setwd(here("release_data"))

####### FIGURE A5 - GOOGLE ########
gt_weather <- read.csv("gt_weather.csv")

gt_weather$date <- as.Date(gt_weather$date)

gt_weather$admin <- "Obama"
gt_weather$admin[gt_weather$date<as.Date("2009-01-01")]<- "Bush"
gt_weather$admin[gt_weather$date>=as.Date("2017-01-01")]<- "Trump"

ggplot(gt_weather, aes(x=date, y=weather, color=admin))+geom_point(alpha=0.3)+theme(legend.position = "bottom", legend.title=element_blank())+xlab(NULL)+ylab("Google Trends")+stat_smooth(method="lm",se=F)

####### FIGURE A5 - BING ########

w <- read.csv("weather_proportions.csv")

w$date <- as.Date(w$date)
ggplot(w, aes(x=date, y=pct/100, color=admin))+geom_point(alpha=0.1)+stat_smooth(method="lm", se=F)+scale_y_continuous(trans='log10')+ylab("% of Bing Searches\n(Logged)")+xlab(NULL)+theme(legend.title = element_blank(), legend.position="bottom")

######################
######################
# SPEECH PLOTS - BING
######################
######################


###### FIGURE A6 - BING #######
setwd(here("release_data"))

w_speech <- read.csv("weather_speech_proportions.csv")
w_speech_agg <- aggregate(w_speech$pct, list(w_speech$speech,w_speech$hour), mean)
colnames(w_speech_agg)<- c("speech","hour","pct")

ggplot(w_speech_agg, aes(x=hour, y=pct, color=speech))+geom_point()+scale_color_manual(values=c("grey10","grey50","firebrick3"))+geom_vline(xintercept=21, linetype="dashed")+xlab(NULL)+ylab("% Searches")+theme(legend.position = "bottom", legend.title = element_blank())

###### FIGURE A6 - GOOGLE #######

setwd(here("release_data","SOTU","weather"))

files<- list.files()

fl <- data.frame(files=files, num = as.numeric(gsub("\\D","",files)))

fl <- fl[order(fl$num),]
full <- NULL

for(i in 1:length(fl$files)){
  tmp <- read.csv(as.character(fl$files[i]))
  tmp$pull <- i
  
  full <-rbind(full, tmp)
}


full$date2 <- as.Date(full$date)

colnames(full) <- c("hour","immigr","partial","pull","date")


#analysis
full$speech <- "Obama sotu 15"
full$speech[full$pull %in% c(1,7,13)]<- "Trump 2019 speech"
full$speech[full$pull %in% c(2,8,14)]<- "Trump 2019 sotu"
full$speech[full$pull %in% c(3,9,15)]<- "Trump 2018 sotu"
full$speech[full$pull %in% c(4,10,16)]<- "Trump 2017 sotu"
full$speech[full$pull %in% c(5,11,17)]<- "Obama 2016 sotu"
full$speech[full$pull %in% c(6,12,18)]<- "Obama 2015 sotu"

#drop the 2019 immigration speech
#full <- full[full$speech!="Trump 2019 speech",]

#convert times

hours <- as.POSIXct(full$hour, tz = "UTC")

hours <- with_tz(hours, "EST")

full$est_hours <- as.character(hours)
full$est_date <- as.Date(full$est_hours)

# get the dates we want

dates$sotu_date <- as.character(dates$sotu_date)
dates$mo <- str_pad(as.numeric(str_extract(dates$sotu_date,"^\\d+")),width=2, side="left",pad="0")
dates$day <- str_pad(as.numeric(gsub("/","",str_extract(dates$sotu_date,"/\\d+/"))),width=2, side="left",pad="0")
dates$yr <- as.numeric((str_extract(dates$sotu_date,"\\d+$")))

dates$date <- paste0(dates$yr,"-",dates$mo,"-",dates$day)
full <- full[full$est_date %in% as.Date(dates$date),]

full$num_hour <- 0:23

full$speech_time <- ifelse(full$num_hour %in% c(21,22),1,0)

#immigration oval office speech lasted less than one hour
full$speech_time[full$speech=="Trump 2019 speech"&full$num_hour==22]<- 0

full$admin <- ifelse(full$date>as.Date("2017-01-20"),"Trump","Obama")

#look at means

full$speech_date <- ifelse(full$pull<7,1,0)
full$before_after <- "1 week-pre"
full$before_after[full$pull<7]<- "Speech date"
full$before_after[full$pull>=12]<- "1 week-post"

agg <- aggregate(full$immigr, list(full$admin, full$speech_time, full$speech_date), mean)
colnames(agg) <- c("admin","speech_time","speech_date","mean")

agg2 <- aggregate(full$immigr, list(full$admin, full$num_hour, full$before_after), mean)

colnames(agg2) <- c("admin","num_hour","speech_date","mean")

agg2$speech_date <- factor(agg2$speech_date, levels=c("1 week-pre","Speech date","1 week-post"), ordered=T)
ggplot(agg2, aes(x=num_hour, y=mean, color=factor(speech_date)))+geom_point()+facet_wrap(speech_date~admin, ncol=2)+theme(legend.position = "bottom", legend.title=element_blank())+scale_color_manual(values=c("grey50","firebrick3","grey10"))+ylab("Google Trends")+geom_vline(xintercept=21, linetype="dashed")+xlab("Hour (EST)") 
